x
import pandas as pdimport numpy as npimport matplotlib.pyplot as plt import seaborn as sns import squarify# Create a DataFrame with movie datadata = { 'Title': ['Avatar', 'Titanic', 'Star Wars: The Force Awakens', 'The Lion King', 'Inception'], 'Genre': ['Action', 'Romance', 'Sci-Fi', 'Animation', 'Sci-Fi'], 'Year': [2009, 1997, 2015, 2019, 2010], 'Rating': [7.8, 7.8, 7.9, 6.9, 8.8], 'Revenue': [2787965087, 2187463944, 2068223624, 1656943394, 828322032]}df = pd.DataFrame(data)# Display the DataFrameprint(df)plt.figure(figsize=(8, 6))sns.barplot(x='Genre', y='Rating', data=df)plt.title("Bar Graph")plt.xlabel('Genre')plt.ylabel('Average Rating')plt.show() xxxxxxxxxx# Compute the correlation matrixcorrelation_matrix = df[['Rating', 'Revenue', 'Year']].corr()# Plot the heatmapplt.figure(figsize=(8, 6))sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")plt.title('Correlation Heatmap')plt.show()xxxxxxxxxx# Group the data by year and count the number of movies released each yearyear_counts = df['Year'].value_counts().sort_index()# Plotting the radial histogramplt.figure(figsize=(8, 8))ax = plt.subplot(111, polar=True)bars = ax.bar( x=year_counts.index * 2 * np.pi / year_counts.index.max(), height=year_counts.values, width=2 * np.pi / len(year_counts), align='edge')ax.set_theta_direction(-1) # Make the bars go clockwiseax.set_theta_zero_location('N') # Start the bars from the topax.set_title('Radial Histogram of Movies Released Each Year', va='bottom')plt.show()xxxxxxxxxx# Group the data by genre and sum the revenue for each genregenre_revenue = df.groupby('Genre')['Revenue'].sum().reset_index()# Sort the data by revenue in descending ordergenre_revenue = genre_revenue.sort_values(by='Revenue', ascending=False)# Plot the treemapplt.figure(figsize=(10, 8))squarify.plot(sizes=genre_revenue['Revenue'], label=genre_revenue['Genre'], alpha=0.8)plt.axis('off')plt.title('Treemap')plt.show()xxxxxxxxxximport squarifyxxxxxxxxxxpip install squarifyxxxxxxxxxximport sqaurifyxxxxxxxxxxyear_counts = df['Year'].value_counts().sort_index()# Calculate the angle for each yearangles = np.linspace(0, 2*np.pi, len(year_counts), endpoint=False)# Plot the spiral-like plotplt.figure(figsize=(8, 8))ax = plt.subplot(111, polar=True)ax.plot(angles, year_counts.values)ax.set_theta_direction(-1) # Make the plot go clockwiseax.set_theta_zero_location('N') # Start the plot from the topax.set_title('Spiral-like Histogram of Movies Released Each Year', va='bottom')plt.show()xxxxxxxxxxgenre_year_counts = df.groupby(['Genre', 'Year']).size().reset_index(name='Count')# Prepare data for the pie chartgenres = genre_year_counts['Genre'].unique()genre_counts = []year_counts = []for genre in genres: genre_data = genre_year_counts[genre_year_counts['Genre'] == genre] genre_counts.append(genre_data['Count'].sum()) year_counts.append(genre_data['Count'].tolist())# Plot the multilevel pie chartplt.figure(figsize=(10, 8))explode = [0.1] * len(genres) # Explode all slicesplt.pie(genre_counts, labels=genres, explode=explode, autopct='%1.1f%%', startangle=140)# Draw a nested pie chart for each genrefor i, genre in enumerate(genres): explode_inner = [0.05] * len(year_counts[i]) # Explode all slices plt.pie(year_counts[i], radius=1.2, startangle=140, explode=explode_inner, labels=year_counts[i], labeldistance=0.7)plt.axis('equal')plt.title('Multilevel Pie Chart')plt.show()xxxxxxxxxx# Group the data by genre and count the number of movies in each genregenre_counts = df['Genre'].value_counts()# Prepare data for the pyramid diagramgenres = genre_counts.indexcounts = genre_counts.values# Plot the pyramid diagramplt.figure(figsize=(10, 8))# Left side of the pyramid (positive values)plt.barh(range(len(genres)), counts, color='skyblue')# Right side of the pyramid (negative values)plt.barh(range(len(genres)), -counts, color='salmon')# Customize y-axis labelsplt.yticks(range(len(genres)), genres)# Set labels and titleplt.xlabel('Number of Movies')plt.title('Pyramid Diagram')# Show plotplt.grid(False)plt.show()xxxxxxxxxxgenre_year_revenue = df.groupby(['Genre', 'Year'])['Revenue'].sum().reset_index()# Pivot the data to have genres as columns and years as indicespivot_df = genre_year_revenue.pivot(index='Year', columns='Genre', values='Revenue').fillna(0)# Plot the layered area chartplt.figure(figsize=(10, 6))for genre in pivot_df.columns: plt.fill_between(pivot_df.index, pivot_df[genre], label=genre, alpha=0.8)# Add labels and titleplt.xlabel('Year')plt.ylabel('Revenue')plt.title('Layered Area Chart ')plt.legend()plt.show()xxxxxxxxxxplt.figure(figsize=(8, 6))sns.histplot(df['Rating'], bins=10, kde=True)plt.title('Histogram')plt.xlabel('Rating')plt.ylabel('Frequency')plt.show()xxxxxxxxxxplt.figure(figsize=(8, 6))sns.lineplot(x='Year', y='Revenue', data=df.groupby('Year')['Revenue'].sum().reset_index())plt.title('Line plot')plt.xlabel('Year')plt.ylabel('Total Revenue')plt.show()xxxxxxxxxxplt.figure(figsize=(8, 6))sns.scatterplot(x='Rating', y='Revenue', data=df)plt.title('Scatter Graph' )plt.xlabel('Rating')plt.ylabel('Revenue')plt.show()xxxxxxxxxxplt.figure(figsize=(8, 6))sns.boxplot(x='Genre', y='Rating', data=df)plt.title('Box Plot')plt.xlabel('Genre')plt.ylabel('Rating')plt.show()xxxxxxxxxx# Group the data by genre and calculate the average rating and revenue for each genregenre_stats = df.groupby('Genre').agg({'Rating': 'mean', 'Revenue': 'mean'}).reset_index()# Plot the dot chartplt.figure(figsize=(10, 6))plt.scatter(genre_stats['Rating'], genre_stats['Revenue'], s=100, c='skyblue', edgecolors='black', alpha=0.7)# Add labels and titleplt.xlabel('Average Rating')plt.ylabel('Average Revenue')plt.title('Dot Chart')# Add genre labels for each pointfor i, genre in enumerate(genre_stats['Genre']): plt.text(genre_stats['Rating'][i], genre_stats['Revenue'][i], genre, fontsize=10, ha='left', va='center')plt.grid(True)plt.show()xxxxxxxxxxplt.figure(figsize=(10, 6))sns.countplot(x='Genre', data=df, palette='viridis')plt.title('Count Plot')plt.xlabel('Genre')plt.ylabel('Count')plt.xticks(rotation=45, ha='right') # Rotate x-axis labels for better readabilityplt.tight_layout() # Adjust layout to prevent labels from being cut offplt.show()xxxxxxxxxx# Count the number of movies in each genregenre_counts = df['Genre'].value_counts()# Plot the profile mapplt.figure(figsize=(10, 6))genre_counts.plot(kind='bar', color='skyblue')plt.title('Profile Map')plt.xlabel('Genre')plt.ylabel('Number of Movies')plt.xticks(rotation=45, ha='right') # Rotate x-axis labels for better readabilityplt.tight_layout() # Adjust layout to prevent labels from being cut offplt.show()xxxxxxxxxxfrom scipy.cluster import hierarchyxxxxxxxxxx# Encode categorical variablesdf_encoded = pd.get_dummies(df, columns=['Genre'])# Compute the pairwise distance matrixdistance_matrix = hierarchy.distance.pdist(df_encoded)# Perform hierarchical clusteringZ = hierarchy.linkage(distance_matrix, method='ward')# Plot the dendrogramplt.figure(figsize=(12, 6))dn = hierarchy.dendrogram(Z, labels=df.index, leaf_rotation=90)plt.title('Dendrogram ')plt.xlabel('Movies')plt.ylabel('Distance')plt.show()xxxxxxxxxx# Define hypothetical rating categorieshigh_rating = set(df[df['Rating'] >= 8]['Title'])low_rating = set(df[df['Rating'] < 5]['Title'])# Create the Venn diagramplt.figure(figsize=(8, 6))venn2([high_rating, low_rating], ('High Rating', 'Low Rating'))plt.title('Venn Diagram ')plt.show()xxxxxxxxxx# Count the number of movies in each genregenre_counts = df['Genre'].value_counts()# Calculate the cumulative percentagecumulative_percent = 100 * genre_counts.cumsum() / genre_counts.sum()# Create the Pareto chartfig, ax1 = plt.subplots(figsize=(10, 6))# Bar plot for frequencyax1.bar(genre_counts.index, genre_counts, color='skyblue')ax1.set_ylabel('Frequency', color='skyblue')# Line plot for cumulative percentageax2 = ax1.twinx()ax2.plot(genre_counts.index, cumulative_percent, color='orange', marker='o', linestyle='-')ax2.set_ylabel('Cumulative Percentage (%)', color='orange')# Set titles and labelsax1.set_title('Pareto Chart ')ax1.set_xlabel('Genre')# Rotate x-axis labels for better readabilityplt.xticks(rotation=45, ha='right')plt.tight_layout()plt.show()xxxxxxxxxx# Count the number of movies in each genregenre_counts = df['Genre'].value_counts()# Plot the donut chartplt.figure(figsize=(8, 8))plt.pie(genre_counts, labels=genre_counts.index, autopct='%1.1f%%', startangle=90, colors=plt.cm.tab20.colors)# Draw a white circle at the center to create the donut holecentre_circle = plt.Circle((0, 0), 0.70, fc='white')fig = plt.gcf()fig.gca().add_artist(centre_circle)# Add titleplt.title('Donut Chart ')plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circleplt.show()# Datatitles = df['Title']years = df['Year']revenues = df['Revenue']ratings = df['Rating']# Plotplt.figure(figsize=(10, 6))plt.scatter(years, revenues, s=ratings*20, alpha=0.5)# Add labels and titleplt.xlabel('Year')plt.ylabel('Revenue')plt.title('Exploded View diagram ')# Add annotations for movie titlesfor i, title in enumerate(titles): plt.annotate(title, (years[i], revenues[i]), xytext=(5, -5), textcoords='offset points', fontsize=8)plt.grid(True)plt.show()# Calculate genre countsgenre_counts = df['Genre'].value_counts()# Plot pie chartplt.figure(figsize=(8, 8))plt.pie(genre_counts, labels=genre_counts.index, autopct='%1.1f%%', startangle=90, colors=plt.cm.tab20.colors)# Add titleplt.title('Pictorial Percentage Diagram ')# Display the pie chartplt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circleplt.show()import plotly.graph_objects as go# Create a table tracetable_trace = go.Table( header=dict(values=["Title", "Genre", "Year", "Rating", "Revenue"]), cells=dict(values=[df['Title'], df['Genre'], df['Year'], df['Rating'], df['Revenue']]),)# Create the figurefig = go.Figure(data=[table_trace])# Update layoutfig.update_layout( title="Movie Dataset",)# Show the chartfig.show()import plotly.graph_objects as go# Create a table tracetable_trace = go.Table( header=dict(values=["Title", "Genre", "Year", "Rating", "Revenue"]), cells=dict(values=[df['Title'], df['Genre'], df['Year'], df['Rating'], df['Revenue']]),)# Create the figurefig = go.Figure(data=[table_trace])# Update layoutfig.update_layout( title="Table chart", # Title added here)# Show the chartfig.show()